# 1.获取网页的源码
# 2.解析  解析服务器响应的文件   etree.HTML
# 3.打印
import urllib.request

url = 'https://www.baidu.com/'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 '
                  'Safari/537.36'
}

# 请求对象的定制
request = urllib.request.Request(url, headers=headers)

# 发送请求
response = urllib.request.urlopen(request)

# 获取网页源码
content = response.read().decode('utf-8')

# 打印数据
print(content)

# 解析源码
from lxml import etree
# 解析服务器响应文件
tree = etree.HTML(content)
print(tree.xpath('//input[contains(@id,"su")]/@value'))
